#################################################################################
# File Name:  	AccessOffGrid.r
# Project:			Aklin and Urpelainen Off-grid project
# Purpose:      R file for figures
# Data input:   Various
# Output File:  na
# Author: 			Michaël Aklin and Johannes Urpelainen
# Date:         12/15/2019
#################################################################################

#################################################################################
# 0.1. Libraries
#################################################################################

rm(list=ls())

# Load libraries
library("ggplot2")
library("googleVis")
library("tidyverse")
library("haven")
library("ggalluvial") # For geom_flows

#################################################################################
# 0.2. Datasets
#################################################################################

# Set wd of data
setwd("PATH_TO_THE_REPLICATION_PACKAGE")

#################################################################################
# 0.2.1 CSTS
#################################################################################

# Panel data
panel.data = read_dta("./DataCleaned.dta")
panel.data = panel.data %>%
  mutate(n=1) %>%
  group_by(hhid) %>% # Keep obs that were in both waves
  filter(n()==2)

panel.data = as.data.frame(panel.data)

#################################################################################
# Scatter plots
#################################################################################

scatter1 = ggplot(data=panel.data, aes(x=mean_saubhagya,y=mean_hours)) + 
  geom_jitter(width=0.02, height=0.2, alpha = 1/10) +
  geom_smooth(method = "lm", formula = y ~ x + I(x^2), size=1 ) +
  scale_x_continuous(name="Mean Saubhagya", limits=c(0, 1)) +
  scale_y_continuous(name="Mean hours of electricity/day", limits=c(0, 24))
ggsave("./scatter_saubhagya_hours.pdf", 
       plot = scatter1, width = 6, height = 5, units = "in")


scatter2 = ggplot(data=panel.data, aes(x=mean_saubhagya,y=mean_grid)) + 
  geom_smooth(method = "lm", formula = y ~ x + I(x^2), size=1 ) +
  geom_jitter(width=0.02, height=0.02, alpha = 1/10) +
  scale_x_continuous(name="Mean Saubhagya", limits=c(0, 1)) +
  scale_y_continuous(name="Mean grid connections", limits=c(0, 1))
ggsave("./scatter_saubhagya_grid.pdf", 
       plot = scatter2, width = 6, height = 5, units = "in")

scatter3 = ggplot(data=panel.data, aes(x=mean_saubhagya,y=dmean_hours)) + 
  geom_jitter(width=0.02, height=0.2, alpha = 1/10) +
  geom_smooth(method = "lm", formula = y ~ x + I(x^2), size=1 ) +
  scale_x_continuous(name="Mean Saubhagya", limits=c(0, 1)) +
  scale_y_continuous(name="Change in mean hours of electricity/day", limits=c(-20, 20))
ggsave("./scatter_saubhagya_dhours.pdf", 
       plot = scatter3, width = 6, height = 5, units = "in")


#################################################################################
# Sankey
#################################################################################


sankey_primary = ggplot(panel.data,
                         aes(x = wave, 
                             stratum = overall_primary, 
                             alluvium = hhid, 
                             y = 100*n/(length(n)/2), # To get percentages
                             fill = overall_primary)) +
  scale_x_discrete(limits = c("2015", "2018")) +
  geom_flow(width = 1/4) +
  geom_stratum(alpha = .7, width = 1/4) +
  ggrepel::geom_text_repel(
    aes(label = ifelse(as.numeric(wave) == 1, as.character(overall_primary), NA)),
    stat = "stratum", size = 4, direction = "y", nudge_x = -.5) +
  ggrepel::geom_text_repel(
    aes(label = ifelse(as.numeric(wave) == 2, as.character(overall_primary), NA)),
    stat = "stratum", size = 4, direction = "y", nudge_x = .5) +
  theme_bw() +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("")
ggsave("./sankey_primary.pdf", 
       plot = sankey_primary, width = 6, height = 5, units = "in")


panel.data$own_grid = as.factor(panel.data$own_grid)
panel.data$own_shs = as.factor(panel.data$own_shs)
panel.data$own_dieselmg = as.factor(panel.data$own_dieselmg)
panel.data$own_solarmg = as.factor(panel.data$own_solarmg)
panel.data$own_solarlantern = as.factor(panel.data$own_solarlantern)
panel.data$own_kerosenelantern = as.factor(panel.data$own_kerosenelantern)

sankey.grid = ggplot(panel.data,
       aes(x = wave, 
           stratum = own_grid, 
           alluvium = hhid, 
           y = 100*n/(length(n)/2), # To get percentages
           fill = own_grid)) +
  scale_x_discrete(limits = c("2015", "2018"))  + #, expand=c(0,5)
  coord_fixed(ratio = 0.1) +
  geom_flow(width = 0.1) +
  geom_stratum(alpha = .7, width = 0.1) +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("") + 
  theme_bw() + 
  theme(plot.subtitle = element_text(hjust = 0.5), 
        legend.position = "none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) +
  labs(subtitle="Grid") +
  ylab(NULL)

panel.data.2 = panel.data %>%
  group_by(hhid) %>%
  filter(!any(own_shs == "NA"))

sankey.shs = ggplot(data=panel.data.2,
                     aes(x = wave, 
                         stratum = own_shs, 
                         alluvium = hhid, 
                         y = 100*n/(length(n)/2), # To get percentages
                         fill = own_shs)) +
  scale_x_discrete(limits = c("2015", "2018"))  + #, expand=c(0,5)
  coord_fixed(ratio = 0.1) +
  geom_flow(width = 0.1) +
  geom_stratum(alpha = .7, width = 0.1) +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("") + 
  theme_bw() + 
  theme(plot.subtitle = element_text(hjust = 0.5), 
        legend.position = "none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) +
  labs(subtitle="SHS") +
  ylab(NULL)

sankey.diesel = ggplot(panel.data,
                    aes(x = wave, 
                        stratum = own_dieselmg, 
                        alluvium = hhid, 
                        y = 100*n/(length(n)/2), # To get percentages
                        fill = own_dieselmg)) +
  scale_x_discrete(limits = c("2015", "2018"))  + #, expand=c(0,5)
  coord_fixed(ratio = 0.1) +
  geom_flow(width = 0.1) +
  geom_stratum(alpha = .7, width = 0.1) +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("") + 
  theme_bw() + 
  theme(plot.subtitle = element_text(hjust = 0.5), 
        legend.position = "none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) +
  labs(subtitle="Diesel MG") +
  ylab(NULL)

sankey.solarmg = ggplot(panel.data,
                    aes(x = wave, 
                        stratum = own_solarmg, 
                        alluvium = hhid, 
                        y = 100*n/(length(n)/2), # To get percentages
                        fill = own_solarmg)) +
  scale_x_discrete(limits = c("2015", "2018"))  + #, expand=c(0,5)
  coord_fixed(ratio = 0.1) +
  geom_flow(width = 0.1) +
  geom_stratum(alpha = .7, width = 0.1) +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("") + 
  theme_bw() + 
  theme(plot.subtitle = element_text(hjust = 0.5), 
        legend.position = "none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) +
  labs(subtitle="Solar MG") +
  ylab(NULL)


panel.data.3 = panel.data %>%
  group_by(hhid) %>%
  filter(!any(own_solarlantern == "NA"))

sankey.solarlantern = ggplot(panel.data.3,
                        aes(x = wave, 
                            stratum = own_solarlantern, 
                            alluvium = hhid, 
                            y = 100*n/(length(n)/2), # To get percentages
                            fill = own_solarlantern)) +
  scale_x_discrete(limits = c("2015", "2018"))  + #, expand=c(0,5)
  coord_fixed(ratio = 0.1) +
  geom_flow(width = 0.1) +
  geom_stratum(alpha = .7, width = 0.1) +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("") + 
  theme_bw() + 
  theme(plot.subtitle = element_text(hjust = 0.5), 
        legend.position = "none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) +
  labs(subtitle="Solar L") +
  ylab(NULL)


sankey.kerosenelantern = ggplot(panel.data.2,
                             aes(x = wave, 
                                 stratum = own_kerosenelantern, 
                                 alluvium = hhid, 
                                 y = 100*n/(length(n)/2), # To get percentages
                                 fill = own_kerosenelantern)) +
  scale_x_discrete(limits = c("2015", "2018"))  + #, expand=c(0,5)
  coord_fixed(ratio = 0.1) +
  geom_flow(width = 0.1) +
  geom_stratum(alpha = .7, width = 0.1) +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("") + 
  theme_bw() + 
  theme(plot.subtitle = element_text(hjust = 0.5), 
        legend.position = "none",
        plot.margin = unit(c(0, 0, 0, 0), "cm")) +
  labs(subtitle="Kerosene L.") +
  ylab(NULL)

library("ggpubr")

sankey.ownership = ggarrange(sankey.grid, 
                  sankey.kerosenelantern, 
                  sankey.shs,
                  sankey.solarlantern,
                  sankey.diesel,
                  sankey.solarmg,
                  nrow=1) + 
  theme(plot.margin=unit(c(0,0,0,0),"mm"))
ggsave(plot = sankey.ownership, "./sankey_ownership.pdf", 
        width = 6.5, height = 5, units = "in")

# Need to crop white space on the top/bottom; not sure why it's there, but this works
system(paste("pdfcrop", "./sankey_ownership.pdf",
             "./sankey_ownership_crop.pdf",
             "--margin=0 1 0 0"))
             

panel.data.4 = panel.data %>%
  group_by(hhid) %>%
  filter(!any(lighting_group == "Other combo"))


sankey_combo_1 = ggplot(panel.data,
                        aes(x = wave, 
                            stratum = lighting_group, 
                            alluvium = hhid, 
                            y = 100*n/(length(n)/2), # To get percentages
                            fill = lighting_group)) +
  scale_x_discrete(limits = c("2015", "2018")) +
  geom_flow(width = 1/4) +
  geom_stratum(alpha = .7, width = 1/4) +
  ggrepel::geom_text_repel(
    aes(label = ifelse(as.numeric(wave) == 1, as.character(lighting_group), NA)),
    stat = "stratum", size = 4, direction = "y", nudge_x = -.5) +
  ggrepel::geom_text_repel(
    aes(label = ifelse(as.numeric(wave) == 2, as.character(lighting_group), NA)),
    stat = "stratum", size = 4, direction = "y", nudge_x = .5) +
  theme_bw() +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("")
ggsave("./sankey_combo_v1.pdf", 
       plot = sankey_combo_1, width = 6, height = 5, units = "in")

sankey_combo_2 = ggplot(panel.data.4,
                        aes(x = wave, 
                            stratum = lighting_group, 
                            alluvium = hhid, 
                            y = 100*n/(length(n)/2), # To get percentages
                            fill = lighting_group)) +
  scale_x_discrete(limits = c("2015", "2018")) +
  geom_flow(width = 1/4) +
  geom_stratum(alpha = .7, width = 1/4) +
  ggrepel::geom_text_repel(
    aes(label = ifelse(as.numeric(wave) == 1, as.character(lighting_group), NA)),
    stat = "stratum", size = 4, direction = "y", nudge_x = -.5) +
  ggrepel::geom_text_repel(
    aes(label = ifelse(as.numeric(wave) == 2, as.character(lighting_group), NA)),
    stat = "stratum", size = 4, direction = "y", nudge_x = .5) +
  theme_bw() +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("")
ggsave("./sankey_combo_v2.pdf", 
       plot = sankey_combo_2, width = 6, height = 5, units = "in")


sankey_combo_3 = ggplot(panel.data,
                        aes(x = wave, 
                            stratum = dist_lighting_group, 
                            alluvium = hhid, 
                            y = 100*n/(length(n)/2), # To get percentages
                            fill = dist_lighting_group)) +
  scale_x_discrete(limits = c("2015", "2018")) +
  geom_flow(width = 1/4) +
  geom_stratum(alpha = .7, width = 1/4) +
  ggrepel::geom_text_repel(
    aes(label = ifelse(as.numeric(wave) == 1, as.character(dist_lighting_group), NA)),
    stat = "stratum", size = 4, direction = "y", nudge_x = -.5) +
  ggrepel::geom_text_repel(
    aes(label = ifelse(as.numeric(wave) == 2, as.character(dist_lighting_group), NA)),
    stat = "stratum", size = 4, direction = "y", nudge_x = .5) +
  theme_bw() +
  theme(legend.position = "none") +
  ylab("Percent Households") +
  xlab("")
ggsave("./sankey_combo_v3.pdf", 
       plot = sankey_combo_3, width = 6, height = 5, units = "in")
